####
## Project:            Do Corporate Regulations Deter or Stimulate Investment? The Effect of the OECD Anti-Bribery Convention on FDI
## File name:          aux_functions.R
## Description:        Programs functions used by various R scripts in the project
## Programmed by:      Lorenzo Crippa. Other sources specified below.
####

# not in operator ----
`%!in%` <- Negate(`%in%`)

# recenter() ----
# a function to recenter a vector on its mean
recenter <- function(v){
  new.vector <- v - mean(v, na.rm = T)
  return(new.vector)
}

# scales() ----
# function to rescale labels on y axes in ggplots
scales <- function(x) {
  perc.value <- x * 100
  digits <- sprintf("%.1f", perc.value)
  out <- paste0(digits, "%")
  return(out)
}

# marginal effect() ----
# function to estimate the effect of a change from oecdABC = 0 to oecdABC = 1 in predicted
# probability of investing, conditional on levels of host country corruption. 
# The function computes confidence intervals of the point estimates employing a simulation method. 
# I have modified the function proposed in replication materials from Beazer and Blake (2018). With my changes, 
# the user needs to specify the model, the range of the moderator variable, and can specify the number of simulations 
# (1000 by default) and the desired level of the confidence intervals. 
# The function doesn't plot anything automatically, but exports a data frame that the user can employ to plot using ggplot().
# Also, the function works with models having any number of covariates
marg_eff <- function(model, range, seed = 1234, nsims = 1000, ci.lvl = .95){
  set.seed(seed)
  S <- sim(model, n.sims = nsims)
  d.lo <- 0
  d.hi <- 1 
  x.sim <- range
  
  # create two profiles: one corresponding to treated, one corresponding to control.
  # Make the X range over the full spectrum of observed moderator
  
  vars <- rownames(summary(model)$coefficients)
  
  # loop over variables to create the profiles. Keep covariates at 0 (i.e., mean: we recentered them)
  for (v in vars) {
    if (v == '(Intercept)'){
      profile.d.hi <- rep(1, length(x.sim))
      profile.d.lo <- rep(1, length(x.sim))
    } else if (v == 'oecdABC') {
      profile.d.hi <- cbind(profile.d.hi, d.hi)
      profile.d.lo <- cbind(profile.d.lo, d.lo)
    } else if (v %in% c('paciHost2005', 'paciHost', 'v2exbribeHost', 'cceHost')) {
      profile.d.hi <- cbind(profile.d.hi, x.sim)
      profile.d.lo <- cbind(profile.d.lo, x.sim)
    } else if (v %in% c('I(paciHost2005 * paciHost2005)', 'I(paciHost * paciHost)', 
                        'I(v2exbribeHost * v2exbribeHost)', 'I(cceHost * cceHost)')) {
      profile.d.hi <- cbind(profile.d.hi, x.sim^2)
      profile.d.lo <- cbind(profile.d.lo, x.sim^2)
      
    } else if (v %in% c('oecdABC:paciHost2005', 'oecdABC:paciHost', 'oecdABC:v2exbribeHost', 'oecdABC:cceHost')) {
      profile.d.hi <- cbind(profile.d.hi, d.hi*x.sim)
      profile.d.lo <- cbind(profile.d.lo, d.lo*x.sim)
    } else if (v %in% c('oecdABC:I(paciHost2005 * paciHost2005)', 'oecdABC:I(paciHost * paciHost)', 
                        'oecdABC:I(v2exbribeHost * v2exbribeHost)', 'oecdABC:I(cceHost * cceHost)')) {
      profile.d.hi <- cbind(profile.d.hi, d.hi*(x.sim^2))
      profile.d.lo <- cbind(profile.d.lo, d.lo*(x.sim^2))
    } else {
      profile.d.hi <- cbind(profile.d.hi, 0)
      profile.d.lo <- cbind(profile.d.lo, 0)
    }
    
  }
  
  # simulate CIs for both profiles:
  alpha <- 1 - ci.lvl
  sim.hi.mat.ci <- matrix(rep(NA,length(x.sim)*nsims),nrow=length(x.sim))
  for (i in 1:nsims){ 
    sim.hi.mat.ci[,i] <- invlogit(profile.d.hi %*% fixef(S)[i,]) 
  }
  
  sim.hi.ci <- matrix(rep(NA,length(x.sim)*2),nrow=length(x.sim))
  for (j in 1:length(x.sim)){
    sim.hi.ci[j,1] <- quantile(sim.hi.mat.ci[j,],(alpha/2))
    sim.hi.ci[j,2] <- quantile(sim.hi.mat.ci[j,],(ci.lvl + alpha/2))
  }
  
  sim.lo.mat.ci <- matrix(rep(NA,length(x.sim)*nsims),nrow=length(x.sim))
  for (i in 1:nsims){
    sim.lo.mat.ci[,i] <-  invlogit(profile.d.lo %*% fixef(S)[i,])
  }
  
  sim.lo.ci <- matrix(rep(NA,length(x.sim)*2),nrow=length(x.sim))
  for (j in 1:length(x.sim)){
    sim.lo.ci[j,1] <- quantile(sim.lo.mat.ci[j,],(alpha/2))
    sim.lo.ci[j,2] <- quantile(sim.lo.mat.ci[j,],(ci.lvl + alpha/2))
  }
  
  # effect in terms of change in probability:
  diff.mat <- sim.hi.mat.ci-sim.lo.mat.ci
  diff.ci <- matrix(rep(NA,length(x.sim)*3),nrow=length(x.sim))
  
  for (j in 1:length(x.sim)){
    diff.ci[j,1] <- quantile(diff.mat[j,],(alpha/2))
    diff.ci[j,2] <- mean(diff.mat[j,], na.rm = T)
    diff.ci[j,3] <- quantile(diff.mat[j,],(ci.lvl + alpha/2))
  }
  
  
  # express them in pct terms too (if needed for plotting)
  diffpct.mat <- (sim.hi.mat.ci-sim.lo.mat.ci)/sim.lo.mat.ci*100
  diffpct.ci <- matrix(rep(NA,length(x.sim)*3),nrow=length(x.sim))
  
  for (j in 1:length(x.sim)){
    diffpct.ci[j,1] <- quantile(diffpct.mat[j,],(alpha/2))
    diffpct.ci[j,2] <- mean(diffpct.mat[j,], na.rm = T)
    diffpct.ci[j,3] <- quantile(diffpct.mat[j,],(ci.lvl + alpha/2))
  }
  
  # return values computed in a DF:
  df <- data.frame(
    x = x.sim,
    change = diffpct.ci[,2],
    lower = diffpct.ci[,1],
    upper = diffpct.ci[,3]
  )
  return(df)
}

# shift_legend() ----
shift_legend <- function(p){
  
  # check if p is a valid object
  if(!"gtable" %in% class(p)){
    if("ggplot" %in% class(p)){
      gp <- ggplotGrob(p) # convert to grob
    } else {
      message("This is neither a ggplot object nor a grob generated from ggplotGrob. Returning original plot.")
      return(p)
    }
  } else {
    gp <- p
  }
  
  # check for unfilled facet panels
  facet.panels <- grep("^panel", gp[["layout"]][["name"]])
  empty.facet.panels <- sapply(facet.panels, function(i) "zeroGrob" %in% class(gp[["grobs"]][[i]]))
  empty.facet.panels <- facet.panels[empty.facet.panels]
  if(length(empty.facet.panels) == 0){
    message("There are no unfilled facet panels to shift legend into. Returning original plot.")
    return(p)
  }
  
  # establish extent of unfilled facet panels (including any axis cells in between)
  empty.facet.panels <- gp[["layout"]][empty.facet.panels, ]
  empty.facet.panels <- list(min(empty.facet.panels[["t"]]), min(empty.facet.panels[["l"]]),
                             max(empty.facet.panels[["b"]]), max(empty.facet.panels[["r"]]))
  names(empty.facet.panels) <- c("t", "l", "b", "r")
  
  # extract legend & copy over to location of unfilled facet panels
  guide.grob <- which(gp[["layout"]][["name"]] == "guide-box")
  if(length(guide.grob) == 0){
    message("There is no legend present. Returning original plot.")
    return(p)
  }
  gp <- gtable_add_grob(x = gp,
                        grobs = gp[["grobs"]][[guide.grob]],
                        t = empty.facet.panels[["t"]],
                        l = empty.facet.panels[["l"]],
                        b = empty.facet.panels[["b"]],
                        r = empty.facet.panels[["r"]],
                        name = "new-guide-box")
  
  # squash the original guide box's row / column (whichever applicable)
  # & empty its cell
  guide.grob <- gp[["layout"]][guide.grob, ]
  if(guide.grob[["l"]] == guide.grob[["r"]]){
    gp <- gtable_squash_cols(gp, cols = guide.grob[["l"]])
  }
  if(guide.grob[["t"]] == guide.grob[["b"]]){
    gp <- gtable_squash_rows(gp, rows = guide.grob[["t"]])
  }
  gp <- gtable_remove_grobs(gp, "guide-box")
  
  return(gp)
}